#This is to get:
#Table6
rm(list = ls())
###########
#Libraries#
###########
library("xtable")
library("CBPS")
library("rddtools")
library("Matching")
library("rgenoud")

setwd("/Users/bgpopescu/Dropbox/Legacies_Central_Europe/")
setwd("C:/Users/bogdanp/Dropbox/Legacies_Central_Europe/")

##########
#Read CSV#
##########
data_1930 <- read.csv(file = './data/data_ro_1930.csv')

###################
#Defining Distance#
###################

data_1930$dist1 <- as.numeric( with (data_1930,ifelse(data_1930$treat==1, 1, -1)))
data_1930$dist2 <-data_1930$dist1*data_1930$Ott_Habs_brd_distance

data_1930$bfe1<-ifelse(data_1930$Ott_Habs_brd_NEAR_FID==1, 1, 0)
data_1930$bfe2<-ifelse(data_1930$Ott_Habs_brd_NEAR_FID==9, 1, 0)
data_1930$bfe3<-ifelse(data_1930$Ott_Habs_brd_NEAR_FID==10, 1, 0)
data_1930$bfe4<-ifelse(data_1930$Ott_Habs_brd_NEAR_FID==11, 1, 0)
data_1930$bfe5<-ifelse(data_1930$Ott_Habs_brd_NEAR_FID==12, 1, 0)
data_1930$bfe6<-ifelse(data_1930$Ott_Habs_brd_NEAR_FID==13, 1, 0)
data_1930$bfe7<-ifelse(data_1930$Ott_Habs_brd_NEAR_FID==14, 1, 0)
data_1930$bfe8<-ifelse(data_1930$Ott_Habs_brd_NEAR_FID==15, 1, 0)

#####################
#Step1. Total Effect#
#####################
ef1<-lm(pct_literate~treat + 
          Ott_Habs_brd_distance+
          POINT_X +
          POINT_Y +
          bfe1 + bfe2 + bfe3 +
          bfe4 + bfe5 + bfe6 + bfe7 +
          tavg_mean + 
          prec_mean + 
          sea_lines_distance+
          pct_hungarian +pct_romanian +
          pct_german, data=data_1930)
summary(ef1)

#Step1_a: Making coefficient
ef1_coef_star<- paste(round(ef1$coefficients[["treat"]], digits = 3),
                      "", ifelse(summary(ef1)$coefficients[,4][["treat"]]<.001,"***",
                                 ifelse(summary(ef1)$coefficients[,4][["treat"]]<.01,"**",
                                        ifelse(summary(ef1)$coefficients[,4][["treat"]]<.05,"*",
                                               ifelse(summary(ef1)$coefficients[,4][["treat"]]<.1,"+",
                                                      "")))), "", "", sep = "")



#Step1_b: Making column
full_sample <-list("Full Sample",
                   ef1_coef_star,
                   paste("(", round(summary(ef1)$coefficients[,2][["treat"]], digits = 3), ")", sep =""),
                   length(fitted(ef1)),
                   round(summary(ef1)$adj.r.squared, digits = 3))

###########################################
#2. Effect optimal - Imbens and Kalyamaran#
###########################################

#Step2_a: Set outcome, forcing and cutoff variable
rdd2_obj1 = rdd_data(y = pct_literate, x = dist2, 
                     cutpoint = 0,
                     data = data_1930)


#Step2_b: Restrict sample to bandwidth area
bw_ik <- rdd_bw_ik(rdd2_obj1)
optimal_band<-round(bw_ik)

#Step2_c: Running the analysis
ef2<-lm(pct_literate~treat + 
          Ott_Habs_brd_distance+
          POINT_X +
          POINT_Y +
          bfe1 + bfe2 + bfe3 +
          bfe4 + bfe5 + bfe6 + bfe7 +
          tavg_mean + 
          prec_mean + 
          sea_lines_distance+
          pct_hungarian +pct_romanian +
          pct_german, data=subset(data_1930, Ott_Habs_brd_distance<optimal_band))

#Step2_d: Making coefficient
ef2_coef_star<- paste(round(ef2$coefficients[["treat"]], digits = 3),
                      "", ifelse(summary(ef2)$coefficients[,4][["treat"]]<.001,"***",
                                 ifelse(summary(ef2)$coefficients[,4][["treat"]]<.01,"**",
                                        ifelse(summary(ef2)$coefficients[,4][["treat"]]<.05,"*",
                                               ifelse(summary(ef2)$coefficients[,4][["treat"]]<.1,"+",
                                                      "")))), "", "", sep = "")
#Step2_e: Making column
optimal_sample <-list("Optimal Sample",
                      ef2_coef_star,
                      paste("(", round(summary(ef2)$coefficients[,2][["treat"]], digits = 3), ")", sep =""),
                      length(fitted(ef2)),
                      round(summary(ef2)$adj.r.squared, digits = 3))

####################
#3.Genetic Matching#
####################

#Step3_a: The covariates we want to match on
X = cbind(data_1930$Ott_Habs_brd_distance,
          data_1930$POINT_X,
          data_1930$POINT_Y,
          data_1930$bfe1, data_1930$bfe2, data_1930$bfe3,
          data_1930$bfe4, data_1930$bfe5, data_1930$bfe6, data_1930$bfe7,
          data_1930$tavg_mean, data_1930$prec_mean,
          data_1930$sea_lines_distance,
          data_1930$pct_hungarian, data_1930$pct_romanian, data_1930$pct_german)


##Step3_b: The covariates we want to obtain balance on. This is more like the IV we are trying to include in the model
BalanceMat <- cbind(data_1930$Ott_Habs_brd_distance,
                    data_1930$POINT_X,
                    data_1930$POINT_Y,
                    data_1930$bfe1, data_1930$bfe2, data_1930$bfe3,
                    data_1930$bfe4, data_1930$bfe5, data_1930$bfe6, data_1930$bfe7,
                    data_1930$tavg_mean, data_1930$prec_mean,
                    data_1930$sea_lines_distance,
                    data_1930$pct_hungarian, data_1930$pct_romanian, data_1930$pct_german)


#Step3_c: Optimal weight 
#Let's call GenMatch() to find the optimal weight to give each
#covariate in 'X' so as we have achieved balance on the covariates in #'BalanceMat'.
#BalanceMatrix should generally contain the variables and the function of these variables that we wish to balance.
#This is only an example so we want GenMatch to be quick
#so the population size has been set to be only 1000 via the 'pop.size'
#option. pop.size argument is important and greatly influences how long the function takes to run. 
## one-to-one matching with replacement (the "M=1" option).

genout <- GenMatch(Tr=data_1930$treat, X=X, BalanceMatrix=BalanceMat, estimand="ATE", M=1,
                   pop.size=1000, max.generations=10, wait.generations=1)


#Step3_d: The outcome variable
Y=data_1930$pct_literate

#Step3_e: The causal effect
# Now that GenMatch() has found the optimal weights, let's estimate
# our causal effect of interest using those weights
## one-to-one matching with replacement (the "M=1" option).
mout <- Match(Y=Y, Tr=data_1930$treat, X=X, Weight.matrix=genout) 
summary(mout)

#Step3_f: Balance on vars of interest
#Let's determine if balance has actually been obtained on the variables of interest #
mb <- MatchBalance(treat~Ott_Habs_brd_distance + 
                     POINT_X + POINT_Y +
                     bfe1 + bfe2 + bfe3 +
                     bfe4 + bfe5 + bfe6 + bfe7 +
                     tavg_mean + prec_mean + 
                     sea_lines_distance+
                     pct_hungarian + pct_romanian + pct_german,
                   data=data_1930,
                   match.out=mout, nboots=1000)
summary(mout)

#Step3_g: Making coefficient
estim<-as.numeric(mout$est)
p_val_gen = (1 - pnorm(abs(mout$est/mout$se))) * 2
gen_coef_star<- paste(round(mout$est, digits = 3),
                      "", ifelse(p_val_gen<.001,"***",
                                 ifelse(p_val_gen<.01,"**",
                                           ifelse(p_val_gen<.05,"*",
                                                  ifelse(p_val_gen<.1,"+",
                                                  "")))), "", "", sep = "")


#Step3_h: Making column
genetic_matching <-list("Genetic Matching",
                      gen_coef_star,
                      paste("(", round(mout$se, digits = 3), ")", sep =""),
                      mout$wnobs,
                      paste(""))

#######################################################
#4. CBPS - Covariate Balance Propensity Score Matching#
#######################################################
#Step4_a: Running CBPS
model_balanced <- CBPS(treat~Ott_Habs_brd_distance+
                         POINT_X +
                         POINT_Y +
                         bfe1 + bfe2 + bfe3 +
                         bfe4 + bfe5 + bfe6 + bfe7 +
                         tavg_mean + 
                         prec_mean + 
                         sea_lines_distance+
                         pct_hungarian +pct_romanian +
                         pct_german, 
                       data = data_1930, ATT = TRUE, na.action = na.exclude)
summary(model_balanced)

#Step4_b: Including Weights
data_1930$weights <- model_balanced$weights
model_balanced<-lm(pct_literate~treat + 
                     Ott_Habs_brd_distance+
                     POINT_X +
                     POINT_Y +
                     bfe1 + bfe2 + bfe3 +
                     bfe4 + bfe5 + bfe6 + bfe7 +
                     tavg_mean + 
                     prec_mean + 
                     sea_lines_distance+
                     pct_hungarian +pct_romanian +
                     pct_german, 
                   data = data_1930, weights = data_1930$weights)
summary(model_balanced)

#Step4_c: Calculating SE 
model_balanced$coefficients
se_model_balanced <- sqrt(diag(vcov(model_balanced)))

#Step4_d: Making coefficient
model_balanced_coef_star<- paste(round(model_balanced$coefficients[["treat"]], digits = 3),
                      "", ifelse(summary(model_balanced)$coefficients[,4][["treat"]]<.001,"***",
                                    ifelse(summary(model_balanced)$coefficients[,4][["treat"]]<.01,"**",
                                           ifelse(summary(model_balanced)$coefficients[,4][["treat"]]<.05,"*",
                                                  ifelse(summary(model_balanced)$coefficients[,4][["treat"]]<.1,"+",
                                                        "")))), "", "", sep = "")

#Step4_d: Making column
cbps_lm <-list("CBPS",
               model_balanced_coef_star,
                      paste("(", round(se_model_balanced[["treat"]], digits = 3), ")", sep =""),
                      length(fitted(model_balanced)),
                      round(summary(model_balanced)$adj.r.squared, digits = 3))

#########################
#5. Excluding Hungarians#
#########################
#Step5_a: Running model
data_no_hung<-subset(data_1930, pct_hungarian<5)
ef_no_hung<-lm(pct_literate~treat + 
                 Ott_Habs_brd_distance+
                 POINT_X +
                 POINT_Y +
                 bfe1 + bfe2 + bfe3 +
                 bfe4 + bfe5 + bfe6 + bfe7 +
                 tavg_mean + 
                 prec_mean + 
                 sea_lines_distance+
                 pct_hungarian +pct_romanian +
                 pct_german, data=data_no_hung)
summary(ef_no_hung)

#Step5_b: Making coefficient
ef_no_hung_coef_star<- paste(round(ef_no_hung$coefficients[["treat"]], digits = 3),
                             "", ifelse(summary(ef_no_hung)$coefficients[,4][["treat"]]<.001,"***",
                                        ifelse(summary(ef_no_hung)$coefficients[,4][["treat"]]<.01,"**",
                                               ifelse(summary(ef_no_hung)$coefficients[,4][["treat"]]<.05,"*",
                                                      ifelse(summary(ef_no_hung)$coefficients[,4][["treat"]]<.001,"+",
                                                      "")))), "", "", sep = "")
#Step5_b: Making column
ef_no_hung_list <-list("Pct. Hungarian<5%",
                      ef_no_hung_coef_star,
                      paste("(", round(summary(ef_no_hung)$coefficients[,2][["treat"]], digits = 3), ")", sep =""),
                      length(fitted(ef_no_hung)),
                      round(summary(ef_no_hung)$adj.r.squared, digits = 3))



######################
#6. Excluding Germans#
######################
#Step6_a: Running model
data_no_german<-subset(data_1930, pct_german<2)
ef_no_german<-lm(pct_literate~treat + 
                   Ott_Habs_brd_distance+
                   POINT_X +
                   POINT_Y +
                   bfe1 + bfe2 + bfe3 +
                   bfe4 + bfe5 + bfe6 + bfe7 +
                   tavg_mean + 
                   prec_mean + 
                   sea_lines_distance+
                   pct_hungarian +pct_romanian +
                   pct_german, data= data_no_german)
summary(ef_no_german)

#Step6_b: Making coefficient
ef_no_german_coef_star<- paste(round(ef_no_german$coefficients[["treat"]], digits = 3),
                               "", ifelse(summary(ef_no_german)$coefficients[,4][["treat"]]<.001,"***",
                                          ifelse(summary(ef_no_german)$coefficients[,4][["treat"]]<.01,"**",
                                                 ifelse(summary(ef_no_german)$coefficients[,4][["treat"]]<.05,"*",
                                                        ifelse(summary(ef_no_german)$coefficients[,4][["treat"]]<.1,"+",
                                                        "")))), "", "", sep = "")

#Step6_c: Making coefficient
ef_no_german_list <-list("Pct. German<2%",
                         ef_no_german_coef_star,
                         paste("(", round(summary(ef_no_german)$coefficients[,2][["treat"]], digits = 3), ")", sep =""),
                         length(fitted(ef_no_german)),
                         round(summary(ef_no_german)$adj.r.squared, digits = 3))


##############
#MAKING TABLE#
##############

#Step1: Making varriable column
variables <- list("", "Ottoman", "", "Observations", "R-Squared")

#Step2: Binding columns
sd<-as.data.frame(cbind(
  variables,
  full_sample,
  optimal_sample,
  genetic_matching,
  cbps_lm,
  ef_no_hung_list,
  ef_no_german_list))

#Step3: Adding relevant information to table
as<-colnames(sd)
geog_ctrl<-list("Geographic Ctrls.", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes")
geog_ctrl<-as.data.frame(rbind(geog_ctrl))
colnames(geog_ctrl)<-as
final<-rbind(sd, geog_ctrl)

#Step4: Adding numbers to columns
colnames(final)[1] <- ""
colnames(final)[2] <- "(1)"
colnames(final)[3] <- "(2)"
colnames(final)[4] <- "(3)"
colnames(final)[5] <- "(4)"
colnames(final)[6] <- "(5)"
colnames(final)[7] <- "(6)"

#Step5: Creating the latex table
object_latex<-xtable(final, type = "latex", 
                     caption = "Results",
                     #Note there are 6 columns to be aligned below.
                     #This is because of the index column.
                     align = "llcccccc")


setwd("./Paper/tables")

#Adding footnotes
comment <- list(pos = list(0), command = NULL)
comment$pos[[1]] <- c(nrow(object_latex))
comment$command <- c(paste("\\bottomrule\n",
                           "\\multicolumn{7}{l} {\\parbox[t]{20cm}\n",
                           "{\\footnotesize \\textit{Note}: The dependent variable is percent literate in 1930. 
                           *** = p<.001; ** = p<.01, *=p<.05, +=p<.10.}} \\\\ \n", sep = ""))


print(object_latex,
      #The following line controls where the lines are drawn
      hline.after=c(-1, 1, 3),
      floating = FALSE,
      file = "table6.tex", 
      #caption.placement = "top",
      add.to.row = comment,
      booktabs = TRUE,
      include.rownames=FALSE)

